1 Preparatory steps for the Markdown file

Setting the options for knitr:

library(knitr)
knitr::opts_chunk$set(echo = TRUE,
                       comment = NA,
                       prompt  = FALSE,
                       cache   = FALSE,
                       warning = FALSE,
                       message = FALSE,
                       fig.align="center",
                       fig.width = 8.125,
                       out.width = "100%",
                       fig.path = "D:/figures/eL2/72dpi/",
                       dev = c('png', 'tiff'),
                       dev.args = list(png = list(type = "cairo"), tiff = list(compression = 'lzw')),
                       dpi = 72,
                       cache = TRUE,
                       cache.path = "D:/cache/eL2/",
                       autodep = TRUE)
options(width = 1000, scipen = 999, knitr.kable.NA = '')

Setting the options for summarytools:

library(summarytools)
st_options(plain.ascii        = FALSE,        # Always use this option in Rmd documents
            style             = "rmarkdown", # Always use this option in Rmd documents
            footnote          = NA,          # Makes html-rendered results more concise
            subtitle.emphasis = FALSE)  # Improves layout with some rmarkdown themes
st_css()

Loading libraries:

library(tidyverse)
library(ggplot2) # graphics
library(MASS) # for stdres
library(lme4) # glmer
library(lmerTest) # p-values in the summary() and anova() functions
library(performance) # Assess multicolinearity in regression models
library(gridExtra)
library(kableExtra)
library(emmeans) # multiple comparisons and plotting effects
library(car) # For Levene's test
library(FSA) # For Dunn's test
library(rcompanion) # For Glass rank biserial correlation coefficients
library(stats) # For Fligner-Killeen's test
library(DHARMa) # simulate residuals
library(sjPlot) # Plotting effects
library(glmmTMB)
library(stargazer)

Cleaning the memory:

rm(list = ls(all.names = TRUE))

Specifying a seed for random number generation, for the sake of reproducibility:

set.seed(123)

2 Preparing the data

2.1 Main dataset

Loading the data and building a tibble:

df <- read.table("data.txt", sep = "\t", dec = ".", header = TRUE)
df <- as_tibble(df)

Transforming categorical variables (code, L1, nonword, sex, coda_l, final_l, branching_onset, repetition) into factors:

df <- df %>% mutate(code = as.factor(code),
                    L1 = as.factor(L1),
                    nonword = as.factor(nonword),
                    sex = as.factor(sex),
                    coda_l = as.factor(coda_l),
                    final_l = as.factor(final_l),
                    branching_onset = as.factor(branching_onset),
                    repetition = as.factor(repetition))

Given that we only have 1, 2 or 3 vowels for the nonwords, we will consider V as a categorical variable and therefore recode it as a factor:

df <- df %>% mutate(V = as.factor(V))

We also rename the column code into subject and turn it into a factor:

df <- df %>%
  rename(subject = code) %>%
  mutate(subject = as.factor(subject))

We need to be careful and note that although we have two categorical variables coda_l (whether l appears in internal coda position in the nonword) and final_l (whether l appears in final position in the nonword), we do not have 4 different configurations. Indeed, there is no word in which l appears both in internal coda position and in final position, as shown below:

my_table <- df %>%
  dplyr::select(nonword, coda_l, final_l) %>%
  unique() %>%
  with(., table(coda_l, final_l)) %>%
  as.data.frame()
colnames(my_table) <- c("l in internal coda position", "l in final position", "# nonwords")
my_table
  l in internal coda position l in final position # nonwords
1                           0                   0         60
2                           1                   0          4
3                           0                   1          7
4                           1                   1          0

Rather than working with coda_l and final_l, we compute a new categorical variable, occurrence_l, with 3 levels: coda for l appearing in internal coda position, final for l appearing in final position and other for nonwords without l in final or coda position (either with l in another position or without l):

df <- df %>% mutate(occurrence_l = if_else(coda_l == 1, "coda", if_else(final_l == 1, "final", "other")),
                    occurrence_l = as.factor(occurrence_l))

We can further observe the distribution of nonwords with respect to the intersections of the variables occurrence_l, V and branching_onset (the number of branching onsets in a nonword).

We first create a data table for nonwords:

df_nonwords <- df %>% dplyr::select(nonword, occurrence_l, coda_l, final_l, branching_onset, V) %>% unique()

We then inspect several configurations:

df_nonwords %>% with(., table(occurrence_l, branching_onset, dnn = list("occurrence of l", "  branching onset")))
                 branching onset
occurrence of l  0  1  2
          coda   4  0  0
          final  5  2  0
          other 33 25  2
df_nonwords %>% with(., table(occurrence_l, V, dnn = list("occurrence of l", "  # of vowels")))
                 # of vowels
occurrence of l  1  2  3
          coda   0  2  2
          final  3  2  2
          other 23 21 16
df_nonwords %>% with(., table(V, branching_onset, dnn = list("# of vowels", "  branching onset")))
             branching onset
# of vowels  0  1  2
          1 12 14  0
          2 16  7  2
          3 14  6  0

2.2 Additional data about the subjects’s performance

To draw some of the figures of the paper, we need an additional table of information about the subjects’ performances. These data are participant-based rather than item-based.

df_add_data <- read.table("additional_data.txt", header = TRUE, sep = "\t") %>% as_tibble()
df_add_data %>%
  dfSummary() %>%
  print(method = 'render', footnote = NA) 

Data Frame Summary

df_add_data
Dimensions: 62 x 9
Duplicates: 0
No Variable Stats / Values Freqs (% of Valid) Graph Valid Missing
1 Participant [character]
1. EANA-A1
2. EANA-A2
3. EANA-A3
4. EANA-A4
5. EANA-A5
6. EANA-A6
7. EANA-A7
8. EANA-A8
9. EANA-A9
10. EANA-AL1
[ 52 others ]
1(1.6%)
1(1.6%)
1(1.6%)
1(1.6%)
1(1.6%)
1(1.6%)
1(1.6%)
1(1.6%)
1(1.6%)
1(1.6%)
52(83.9%)
62 (100.0%) 0 (0.0%)
2 NWR_pc_Total [numeric]
Mean (sd) : 0.8 (0.1)
min ≤ med ≤ max:
0.4 ≤ 0.8 ≤ 1
IQR (CV) : 0.2 (0.2)
28 distinct values 62 (100.0%) 0 (0.0%)
3 Coda_l_pc [numeric]
Mean (sd) : 0.8 (0.3)
min ≤ med ≤ max:
0 ≤ 0.9 ≤ 1
IQR (CV) : 0.5 (0.4)
0.00:4(6.5%)
0.25:4(6.5%)
0.50:10(16.1%)
0.75:13(21.0%)
1.00:31(50.0%)
62 (100.0%) 0 (0.0%)
4 codas_pc [numeric]
Mean (sd) : 0.9 (0.1)
min ≤ med ≤ max:
0.6 ≤ 0.9 ≤ 1
IQR (CV) : 0.1 (0.1)
0.56 !:1(1.6%)
0.62 !:4(6.5%)
0.69 !:1(1.6%)
0.75  :1(1.6%)
0.81 !:7(11.3%)
0.88 !:11(17.7%)
0.94 !:9(14.5%)
1.00  :28(45.2%)
! rounded
62 (100.0%) 0 (0.0%)
5 Branching_onset_pc [numeric]
Mean (sd) : 0.8 (0.1)
min ≤ med ≤ max:
0.5 ≤ 0.9 ≤ 1
IQR (CV) : 0.1 (0.1)
14 distinct values 62 (100.0%) 0 (0.0%)
6 sC_pc [numeric]
Mean (sd) : 1 (0.2)
min ≤ med ≤ max:
0 ≤ 1 ≤ 1
IQR (CV) : 0 (0.2)
0.00  :1(1.6%)
0.56 !:2(3.2%)
0.78 !:2(3.2%)
0.89 !:4(6.5%)
1.00  :53(85.5%)
! rounded
62 (100.0%) 0 (0.0%)
7 Final_l_pc [numeric]
Mean (sd) : 0.7 (0.3)
min ≤ med ≤ max:
0 ≤ 0.9 ≤ 1
IQR (CV) : 0.4 (0.4)
0.00  :1(1.6%)
0.14 !:4(6.5%)
0.29 !:3(4.8%)
0.43 !:6(9.7%)
0.57 !:7(11.3%)
0.71 !:9(14.5%)
0.86 !:10(16.1%)
1.00  :22(35.5%)
! rounded
62 (100.0%) 0 (0.0%)
8 PVC [integer]
Mean (sd) : 93.2 (11.4)
min ≤ med ≤ max:
9 ≤ 95 ≤ 100
IQR (CV) : 4 (0.1)
16 distinct values 62 (100.0%) 0 (0.0%)
9 PCC [integer]
Mean (sd) : 92.5 (4.7)
min ≤ med ≤ max:
82 ≤ 94 ≤ 99
IQR (CV) : 8 (0.1)
17 distinct values 61 (98.4%) 1 (1.6%)

(pc means percentage correct)

2.3 Additional data about L1 syllabic complexity

The data introduced in this subsection come from the Lapsyd database (Maddieson et al., 2013, Proc. of the 14th Annual Conf. of the Intl. Speech Communication Association (Interspeech 2013), pp. 3022-3026). Among other features, this database describes, for most languages, the complexity of the onset, the nucleus and the coda of their syllables with scores from 0 (least complex) to 3 (most complex), with 1 being the default score.

Loading the data and building a tibble:

df_L1 <- read.table("Data on the syllabic complexity of L1.txt", sep = "\t", dec = ".", header = TRUE, stringsAsFactors = T) %>% as_tibble()

We have data regarding the complexity of the syllables for all the languages of our study but Kabyle:

df_L1
# A tibble: 17 × 2
   Language             L1_syll_complexity
   <fct>                             <int>
 1 Arabic                                4
 2 Arabic/Italian                        6
 3 Italian                               6
 4 Portuguese                            5
 5 Mandarin                              4
 6 Ukrainian/Russian                     7
 7 Romanian                              7
 8 Albanian                              7
 9 Brazilian Portuguese                  5
10 Armenian                              6
11 Russian/Armenian                      7
12 Japanese                              4
13 Georgian/German                       8
14 Russian/Chechen                       8
15 Russian                               7
16 Arabic/Spanish                        5
17 Spanish                               5

We integrate these data to our main dataset:

df <- df %>% left_join(df_L1, by = c("L1" = "Language"))

3 Description of the problems and of the data

3.1 Description of the investigation and of the variables

Our experiment consists in presenting French nonwords to children acquiring this language as an L2, and assess their ability to repeat these target nonwords. We investigate which parameters may explain success or failure at this task. To this end, our statistical approach rests on a (valid) mixed-effects logistic regression model.

The dependent variable of our model is repetition (0 for failure to repeat, 1 for success)

As for the meaningful independent variables, we consider:

  • occurrence_l (whether l appears in coda or final position) (‘coda’ if l appears in internal coda position, ‘final’ if l appears in final position, ‘other’ for other configurations)
  • branching_onset (number of branching onsets in the nonword) (0, 1 or 2)
  • V (number of vowels of the nonword) (1, 2 or 3)
  • rec_voc (size of a child’s French receptive vocabulary)
  • verbal_stm (size of a child’s verbal short-term memory)
  • phono_awareness (z-scored score reflecting the child’s phonological awareness)
  • L1_syll_complexity (overall complexity of the syllabic structures in the child’s L1)
  • LoE (length of a child’s exposure to French, in days)
  • age (child’s age, in months)

We are not only interested in main (fixed) effects, but also in the possible interactions between the previous predictors. We do not consider triple (or even higher-order) interactions for the sake of clarity, and thus focus on simple (i.e., ‘double’ interactions). Even with this approach, however, there are 21 possible interactions given that we have 7 primary predictors (7!/(5!2!)). Rather than considering all possible options and get lost in backward and forward stepwise regression, we choose to investigate only a number of ‘target’ interactions, which we deem meaningful for our study and given our expertise in the domain. These interactions are the following:

  • occurrence_l * LoE
  • branching_onset * LoE
  • rec_voc * LoE
  • age * verbal_stm
  • age * rec_voc
  • verbal_stm * rec_voc

We finally consider a number of random effects to capture the non-independence of our observations:

  • subject (with 62 different levels - one per subject)
  • L1 (with 18 different levels, covering both monolingual and bilingual subjects)
  • nonword (with 71 different levels - one per nonword)

subject is nested into L1, but since there is no ambiguity (i.e. two subjects with the same name/code speaking different languages), nesting subject into L1 explicitly in the model, i.e., (1|L1/subject), is similar to considering crossed random effects, i.e. (1|L1) + (1|subject).

We can additionally formulate precise oriented hypotheses to be tested for the main effects (all other things being equal):

  • Regarding occurrence_l: a nonword gets easier to repeat when shifting from l in coda position to l in final position to another structure
  • Regarding branching_onset: the more branching onsets a nonword has, the more difficult it is to repeat
  • Regarding V: following the litterature, a nonword becomes more difficult to repeat when it has 3 vowels (no difference between 1 and 2 vowels)
  • Regarding, rec_voc: the larger a child’s French receptive vocabulary, the easier it is for them to repeat the nonwords
  • Regarding verbal_stm: the larger a child’s verbal short-term memory, the easier it is for them to repeat the nonwords
  • Regarding phono_awareness: the more developed the children’s phonological awareness, the easier it is for them to repeat the nonwords
  • Regarding L1_syll_complexity: the more complex the syllables can be in the children’s L1, the easier it is for them to repeat the nonwords in French
  • Regarding LoE: the longer the exposure to French, the easier it is for them to repeat the nonwords
  • Regarding age: the older the children, the easier it is for them to repeat the nonwords

The hypotheses for Onset_complexity and Coda_complexity derive from the fact that both onset and coda can be complex in French - with scores of 3 for both of them in Lapsyd.

3.2 Overview of the observations

There are 4402 observations.

We can inspect the distributions of values for the different variables:

df %>%
  dfSummary() %>%
  print(method = 'render', footnote = NA) 

Data Frame Summary

df
Dimensions: 4402 x 18
Duplicates: 0
No Variable Stats / Values Freqs (% of Valid) Graph Valid Missing
1 subject [factor]
1. EANA-A1
2. EANA-A2
3. EANA-A3
4. EANA-A4
5. EANA-A5
6. EANA-A6
7. EANA-A7
8. EANA-A8
9. EANA-A9
10. EANA-AL1
[ 52 others ]
71(1.6%)
71(1.6%)
71(1.6%)
71(1.6%)
71(1.6%)
71(1.6%)
71(1.6%)
71(1.6%)
71(1.6%)
71(1.6%)
3692(83.9%)
4402 (100.0%) 0 (0.0%)
2 sex [factor]
1. F
2. M
1704(38.7%)
2698(61.3%)
4402 (100.0%) 0 (0.0%)
3 L1 [factor]
1. Albanian
2. Arabic
3. Arabic/Italian
4. Arabic/Spanish
5. Armenian
6. Brazilian Portuguese
7. Georgian/German
8. Italian
9. Japanese
10. Kabyle
[ 8 others ]
426(9.7%)
639(14.5%)
852(19.4%)
71(1.6%)
213(4.8%)
284(6.5%)
71(1.6%)
355(8.1%)
142(3.2%)
71(1.6%)
1278(29.0%)
4402 (100.0%) 0 (0.0%)
4 age [integer]
Mean (sd) : 89.6 (10)
min ≤ med ≤ max:
72 ≤ 89.5 ≤ 109
IQR (CV) : 17 (0.1)
31 distinct values 4402 (100.0%) 0 (0.0%)
5 LoE [integer]
Mean (sd) : 180 (79.3)
min ≤ med ≤ max:
24 ≤ 180 ≤ 350
IQR (CV) : 118 (0.4)
44 distinct values 4402 (100.0%) 0 (0.0%)
6 nonword [factor]
1. faku
2. fal
3. fapul
4. fapus
5. fikapul
6. fikupla
7. fikuspa
8. filpa
9. fips
10. fiska
[ 61 others ]
62(1.4%)
62(1.4%)
62(1.4%)
62(1.4%)
62(1.4%)
62(1.4%)
62(1.4%)
62(1.4%)
62(1.4%)
62(1.4%)
3782(85.9%)
4402 (100.0%) 0 (0.0%)
7 V [factor]
1. 1
2. 2
3. 3
1612(36.6%)
1550(35.2%)
1240(28.2%)
4402 (100.0%) 0 (0.0%)
8 coda_l [factor]
1. 0
2. 1
4154(94.4%)
248(5.6%)
4402 (100.0%) 0 (0.0%)
9 branching_onset [factor]
1. 0
2. 1
3. 2
2604(59.2%)
1674(38.0%)
124(2.8%)
4402 (100.0%) 0 (0.0%)
10 final_l [factor]
1. 0
2. 1
3968(90.1%)
434(9.9%)
4402 (100.0%) 0 (0.0%)
11 repetition [factor]
1. 0
2. 1
948(21.5%)
3454(78.5%)
4402 (100.0%) 0 (0.0%)
12 rec_voc [integer]
Mean (sd) : 19.7 (6.4)
min ≤ med ≤ max:
5 ≤ 20 ≤ 31
IQR (CV) : 10 (0.3)
23 distinct values 4402 (100.0%) 0 (0.0%)
13 verbal_stm [integer]
Mean (sd) : 3.8 (0.7)
min ≤ med ≤ max:
2 ≤ 4 ≤ 6
IQR (CV) : 1 (0.2)
2:142(3.2%)
3:1136(25.8%)
4:2485(56.5%)
5:568(12.9%)
6:71(1.6%)
4402 (100.0%) 0 (0.0%)
14 rec_voc_zscore [numeric]
Mean (sd) : -13.8 (6.1)
min ≤ med ≤ max:
-30.8 ≤ -13.4 ≤ -3.6
IQR (CV) : 8.3 (-0.4)
38 distinct values 4402 (100.0%) 0 (0.0%)
15 phono_mem_zscore [numeric]
Mean (sd) : -1.2 (0.9)
min ≤ med ≤ max:
-3.3 ≤ -1 ≤ 1.5
IQR (CV) : 1.2 (-0.8)
11 distinct values 4402 (100.0%) 0 (0.0%)
16 phono_awareness [numeric]
Mean (sd) : -2.8 (1.5)
min ≤ med ≤ max:
-4 ≤ -3.9 ≤ 0.6
IQR (CV) : 1.7 (-0.5)
17 distinct values 4047 (91.9%) 355 (8.1%)
17 occurrence_l [factor]
1. coda
2. final
3. other
248(5.6%)
434(9.9%)
3720(84.5%)
4402 (100.0%) 0 (0.0%)
18 L1_syll_complexity [integer]
Mean (sd) : 5.7 (1.1)
min ≤ med ≤ max:
4 ≤ 6 ≤ 8
IQR (CV) : 2 (0.2)
4:852(19.7%)
5:923(21.3%)
6:1420(32.8%)
7:994(23.0%)
8:142(3.3%)
4331 (98.4%) 71 (1.6%)

4 First figures and statistical analyses

4.1 Boxplot and preliminary analysis of childrens’ percentage of correct repetition

df_add_data %>%
  mutate(group = "") %>%
  ggplot(aes(x = group, y = NWR_pc_Total)) + 
  geom_boxplot(outlier.size = 3, fill="#A4A4A4") +
  stat_summary(fun = mean, geom = "point", shape = 4, size = 4, stroke = 2) +
  geom_jitter(shape = 16, size = 1, position = position_jitter(0.075)) +
  theme_classic() +
  ylab("Percentage of correct repetition") + xlab("") +
  scale_y_continuous(labels = scales::percent, breaks = seq(0.0, 1.0, 0.05))

What is the mean performance?

df_add_data %>% pull(NWR_pc_Total) %>% mean()
[1] 0.7846433

What are the lowest and highest performances?

df_add_data %>% pull(NWR_pc_Total) %>% max()
[1] 0.9577465
df_add_data %>% pull(NWR_pc_Total) %>% min()
[1] 0.4366197

The overall performance on the phonological task is quite high, but the figure also shows a lot of individual variation, with scores from 44% to 96%.

How many children are above 80% of target repetition?

df_add_data %>% filter(NWR_pc_Total > 0.8) %>% nrow()
[1] 37

60% of the children (37/62) performed above 80% of target repetition, which is the average performance of typically developing French monolingual children.

4.2 Boxplot for receptive vocabulary size (z score) (by subject)

df %>%
  group_by(subject) %>%
  summarize(rec_voc_zscore = mean(rec_voc_zscore)) %>%
  ungroup() %>%
  mutate(group = as.factor("participants")) %>%
  ggplot(aes(x = group, y = rec_voc_zscore)) + 
  geom_boxplot(outlier.size = 3, fill="#A4A4A4", coef = 1.5) +
  stat_summary(fun = mean, geom = "point", shape = 4, size = 4, stroke = 2) +
  geom_jitter(shape = 16, size = 1, position = position_jitter(0.075)) +
  theme_classic() +
  ylab("Receptive vocabulary size (z score)") + xlab("") +
  scale_y_continuous(breaks = seq(-30.0, 0.0, 5.0))

4.3 Boxplot and preliminary analysis of the by-subject percentage of correct repetition (by 5 types of syllable structures)

We first prepare the data for the figure:

df_struct <- df_add_data %>%
  dplyr::select(-PCC, -PVC, -NWR_pc_Total) %>%
  rename(`Coda /l/` = Coda_l_pc, `Branching onset` = Branching_onset_pc, `Final /l/` = Final_l_pc, `Obstruent coda` = codas_pc, `#sC cluster` = sC_pc) %>%
  pivot_longer(cols = 2:6, names_to = "struct", values_to = "pc_correct_rep") %>%
  mutate(struct = as.factor(struct),
         struct = fct_relevel(struct, "Branching onset", "Obstruent coda", "Coda /l/", "Final /l/", "#sC cluster"))

Then we draw the boxplot:

df_struct %>%
  ggplot(aes(x = struct, y = pc_correct_rep, fill = struct)) + 
  geom_boxplot(outlier.size = 3, show.legend = FALSE, colour = "black") +
  scale_fill_grey() +
  stat_summary(fun = mean, geom = "point", shape = 4, size = 4, stroke = 2, show.legend = FALSE) +
  geom_jitter(shape = 16, size = 1, position = position_jitter(0.075), show.legend = FALSE) +
  theme_classic() +
  ylab("Percentage of correct repetition") + xlab("") +
  scale_y_continuous(labels = scales::percent, breaks = seq(0.0, 1.0, 0.1))

What are the mean percentages of correct repetition for each structure?

df_struct %>%
  group_by(struct) %>%
  summarize(mean = mean(pc_correct_rep))
# A tibble: 5 × 2
  struct           mean
  <fct>           <dbl>
1 Branching onset 0.846
2 Obstruent coda  0.907
3 Coda /l/        0.754
4 Final /l/       0.726
5 #sC cluster     0.955

Average percentages of correct repetition are equal to 95.5%, 90.7%, 84.6%, 75.4% and 72.6% for #sC clusters, obstruent codas, branching onsets, coda /l/ and final /l/, respectively. These values are, however, not that informative given the strong heterogeneity and the ceilinged distributions: for instance, scores range from 0 to 100% for coda /l/ and final /l/, but from around 50% to 100% for branching onsets.

4.4 Comparison between the median values of the percentages of correct repetition for the 5 types of syllable structures

In this section and the following one, given the non-normality of the distribution of percentages of correct repetition in our 5 groups, we need to rely on non-parametric tests.

We apply a Kruskal-Wallis test to check whether there are differences in terms of median between the 5 groups:

df_struct %>% kruskal.test(pc_correct_rep ~ struct, data = .)

    Kruskal-Wallis rank sum test

data:  pc_correct_rep by struct
Kruskal-Wallis chi-squared = 59.564, df = 4, p-value = 0.000000000003582

We can clearly reject the null hypothesis of no difference between the medians.

What are the median performances for the different structures?

df_struct %>%
  group_by(struct) %>%
  summarize(median = median(pc_correct_rep))
# A tibble: 5 × 2
  struct          median
  <fct>            <dbl>
1 Branching onset  0.855
2 Obstruent coda   0.938
3 Coda /l/         0.875
4 Final /l/        0.857
5 #sC cluster      1    

Medians for the five syllable structures confirm what could be seen with means: #sC clusters and obstruent codas lead to better performances than branching onsets, coda /l/ and final /l/, which have three close median percentages of correct repetition around 85-87%.

We run post-hoc tests to look at pair differences:

df_struct %>% dunnTest(pc_correct_rep ~ struct, data = ., method = "holm")
                         Comparison           Z            P.unadj             P.adj
1     #sC cluster - Branching onset  6.47529641 0.0000000000946260 0.000000000946260
2            #sC cluster - Coda /l/  5.06470605 0.0000004090309978 0.000003272247983
3        Branching onset - Coda /l/ -1.41059035 0.1583654377394939 0.475096313218482
4           #sC cluster - Final /l/  6.39404724 0.0000000001615515 0.000000001453963
5       Branching onset - Final /l/ -0.08124917 0.9352438002034137 0.935243800203414
6              Coda /l/ - Final /l/  1.32934119 0.1837354315793458 0.367470863158692
7      #sC cluster - Obstruent coda  3.01513033 0.0025686885456720 0.012843442728360
8  Branching onset - Obstruent coda -3.46016608 0.0005398423202431 0.003778896241702
9         Coda /l/ - Obstruent coda -2.04957573 0.0404058511140099 0.161623404456040
10       Final /l/ - Obstruent coda -3.37891691 0.0007277199779395 0.004366319867637

We observe clear differences between (the median of % of correct repetitions for) #sC clusters and the 4 other groups. We also find a significant difference between obstruent codas and branching onsets, as well as between obstruent codas and final /l/. There is no significant difference between obstruent codas and coda /l/“, although there would be one without adjustment of the p-values (we could have a false negative due to the correction). We do not observe significant differences between branching onset, /l/ in internal coda position and /l/ in final position.

We can further compute the effect size for each pair comparison, more precisely a Glass rank biserial correlation coefficient with a 95% confidence interval.

We first define all the possible pairs of syllable structures:

struct_pairs <- list(
  c("Branching onset", "Obstruent coda"),
  c("Branching onset", "Coda /l/"),
  c("Branching onset", "Final /l/"),
  c("Branching onset", "#sC cluster"),
  c("Obstruent coda", "Coda /l/"),
  c("Obstruent coda", "Final /l/"),
  c("Obstruent coda", "#sC cluster"),
  c("Coda /l/", "Final /l/"),
  c("Coda /l/", "#sC cluster"),
  c("Final /l/", "#sC cluster")
)

We derive simple descriptions for these pairs:

pairs_descriptions <- lapply(struct_pairs, function(v) {paste0(v, collapse = " - ")}) %>% unlist()

We also derive reduced data frames for the different pairs:

compute_pair_df <- function(target_groups, df) {
  df %>%
    filter(struct %in% target_groups) %>%
    mutate(struct = droplevels(struct), 
           struct = fct_relevel(struct, target_groups))
}

df_for_pairs <- lapply(struct_pairs, compute_pair_df, df = df_struct)

We can now compute effect sizes, and assemble a table for the results:

df_struct_eff_size <- lapply(df_for_pairs, function(df) { df %>% wilcoxonRG(x = .$pc_correct_rep, g = .$struct, ci = T) })


# Extracting a table
tibble(pair = pairs_descriptions,
       `Glass rank biserial correlation coefficient` = df_struct_eff_size %>% map_dbl("rg"),
       lower.ci = df_struct_eff_size %>% map_dbl("lower.ci"),
       upper.ci = df_struct_eff_size %>% map_dbl("upper.ci"))
# A tibble: 10 × 4
   pair                             `Glass rank biserial correlation coefficient` lower.ci upper.ci
   <chr>                                                                    <dbl>    <dbl>    <dbl>
 1 Branching onset - Obstruent coda                                       -0.438   -0.618    -0.236
 2 Branching onset - Coda /l/                                             -0.0367  -0.286     0.187
 3 Branching onset - Final /l/                                             0.102   -0.118     0.325
 4 Branching onset - #sC cluster                                          -0.762   -0.896    -0.614
 5 Obstruent coda - Coda /l/                                               0.182   -0.0248    0.372
 6 Obstruent coda - Final /l/                                              0.346    0.15      0.528
 7 Obstruent coda - #sC cluster                                           -0.383   -0.532    -0.222
 8 Coda /l/ - Final /l/                                                    0.11    -0.0857    0.313
 9 Coda /l/ - #sC cluster                                                 -0.399   -0.553    -0.252
10 Final /l/ - #sC cluster                                                -0.536   -0.666    -0.389

The effect sizes are strong for the differences between #sC cluster and the four other structures, and also for the two pairs (obstruent coda - branching onset) and (obstruent coda - final /l/). The pairs for branching onset, coda /l/ and final /l/ are all associated with much smaller effect sizes.

Overall, the median values themselves, the statistical tests and the measures of effect size suggest three groups for the 5 structures: one for #sC clusters, one for obstruent codas, and one for branching onset, coda /l/ and final /l/.

4.5 Comparison between the variances of the percentages of correct repetition for the 5 types of syllable structures

We can assess the homogeneity of the variance for the five types of syllable structures with Fligner-Killeen (median) test (which is non-parametric, unlike Levene’s test):

df_struct %>% fligner.test(pc_correct_rep ~ struct, data = .)

    Fligner-Killeen test of homogeneity of variances

data:  pc_correct_rep by struct
Fligner-Killeen:med chi-squared = 108.58, df = 4, p-value < 0.00000000000000022

The test clearly reject the null hypothesis of equal variances in the 5 groups.

What are the variances for the different structures?

df_struct %>% group_by(struct) %>% summarize(var = var(pc_correct_rep))
# A tibble: 5 × 2
  struct             var
  <fct>            <dbl>
1 Branching onset 0.0122
2 Obstruent coda  0.0137
3 Coda /l/        0.0953
4 Final /l/       0.0818
5 #sC cluster     0.0233

The 5 variances may be arranged into two groups: the variance is quite clearly higher for “Coda /l/” and “Final /l/” than for “#sC cluster”, “Obstruent coda” and “Branching onset.

We can look at pair differences between the variances of the 5 groups:

test_statistics <- lapply(df_for_pairs, function(df) { fligner.test(pc_correct_rep ~ struct, data = df) })

variance_pair_differences <- tibble(pair = pairs_descriptions, 
                                    statistic = test_statistics %>% map_dbl("statistic"), 
                                    p_values = test_statistics %>% map_dbl("p.value")) %>%
  mutate(adjusted_p_values = p.adjust(p_values, method = "holm"))

variance_pair_differences
# A tibble: 10 × 4
   pair                             statistic p_values adjusted_p_values
   <chr>                                <dbl>    <dbl>             <dbl>
 1 Branching onset - Obstruent coda     0.105 7.46e- 1          7.46e- 1
 2 Branching onset - Coda /l/          45.4   1.59e-11          1.28e-10
 3 Branching onset - Final /l/         25.0   5.81e- 7          2.32e- 6
 4 Branching onset - #sC cluster       32.5   1.18e- 8          7.06e- 8
 5 Obstruent coda - Coda /l/           45.8   1.29e-11          1.16e-10
 6 Obstruent coda - Final /l/          30.5   3.35e- 8          1.67e- 7
 7 Obstruent coda - #sC cluster        24.1   9.26e- 7          2.78e- 6
 8 Coda /l/ - Final /l/                 3.46  6.28e- 2          1.26e- 1
 9 Coda /l/ - #sC cluster              53.7   2.39e-13          2.39e-12
10 Final /l/ - #sC cluster             39.9   2.63e-10          1.84e- 9

All differences are significant except between “Coda /l/” and “Final /l/”, and between “Obstruent coda” and “Branching onset”.

4.6 Conclusions about the medians and variances of the 5 types of structure

Overall, obstruent codas and #sC clusters are little problematic for the children (higher medians and lower variances), while coda /l/ and final /l/ are more challenging for at least some of the children (lower medians and higher variances). Branching onsets fall in between with a lower median and a lower variance, but one may arguably group them with coda /l/ and final /l/ given their medians are not significantly different.

On this basis, in the next part of the analyses, we’re going to focus on the three more challenging structures: branching onsets, coda /l/, and final /l/.

5 Building a valid regression model

As previously said, our approach rests on a regression model, more specifically a mixed-effects logistic regression model where the dependent variable is whether a nonword was correctly repeated or not. We need to ensure we build a valid model, i.e., a model for which all the right assumptions are met and with the right random effects structure.

5.1 Preparing a ‘sub-dataset’ for the regression model

We have a few missing data for L1_syll_complexity and phono_awareness. Given these are variables of interest and we want to include them as predictors in our model, we need to extract a subset of observations.

We first drop a participant speaking Kabyle, given that we don’t have data about the syllabic complexity of this language:

df_reduced <- df %>% filter(! is.na(L1_syll_complexity))

Second, a few participants did not understand the task assessing their phonological awareness and therefore do not have any related score:

df_reduced %>% filter(is.na(phono_awareness)) %>% group_by(L1, subject) %>% tally()
# A tibble: 5 × 3
# Groups:   L1 [3]
  L1       subject      n
  <fct>    <fct>    <int>
1 Albanian EANA-AL4    71
2 Japanese EANA-J1     71
3 Japanese EANA-J2     71
4 Romanian EANA-R1     71
5 Romanian EANA-R2     71

They consist in one Albanian, two Japanese and two Romanian participants. We drop these participants from our final dataset:

df_reduced <- df_reduced %>% filter(! is.na(phono_awareness))

5.2 Defining the control parameters for the glmer model.

We first define a control structure for our glmer models:

ctrl <- glmerControl(optimizer="bobyqa", optCtrl = list(maxfun=100000), calc.derivs = T, check.conv.grad = .makeCC("warning", tol = 1e-3, relTol = NULL))

5.3 Building and assessing a random intercept model

As a baseline for further investigation, we start with an intercept-only model which ‘implements’ our aforementioned predictors.

5.3.1 Computing the model

We define the model with the function glmer():

model_random_intercept <- glmer(repetition ~ occurrence_l + branching_onset + V + scale(LoE) + scale(age) + 
                                scale(rec_voc) + scale(verbal_stm) +
                                scale(phono_awareness) + L1_syll_complexity + 
                                occurrence_l:scale(LoE) + branching_onset:scale(LoE) + 
                                scale(rec_voc):scale(LoE) + scale(age):scale(verbal_stm) +
                                scale(age):scale(rec_voc) + scale(rec_voc):scale(verbal_stm) +
                                (1 | subject) +
                                (1 | L1) + 
                                (1 | nonword),
                                data = df_reduced, control = ctrl, family = binomial(link = "logit"))

We can confirm that the model is not singular:

isSingular(model_random_intercept)
[1] FALSE

This model converges without any problem but in order to trust its outputs, we need to make sure that the assumptions underlying a mixed-effects logistic regression are met. While these assumptions are not as clear as for linear regression, the following conditions matter:

  • the normality of the different distributions for the random effects
  • the correct distribution of the residuals of the model
  • the absence of problematic multicollinearity

5.3.2 Normality of the levels of the random effects

p <- plot_model(model_random_intercept, type = "diag", show.values = TRUE, value.offset = .3)
p$nonword + labs(title = "Random intercepts for nonword")

p$L1 + labs(title = "Random intercepts for L1")

p$subject + labs(title = "Random intercepts for subject") + theme_sjplot()

The quantile-quantile plots of the various random effects indicate that their levels follow a distribution which is quite close to a Gaussian distribution.

5.3.3 Residuals of the model

There is no expected normality of the model residuals with a logistic regression, but we can simulate residuals with the function simulateResiduals() of the DHARMa package. According to the authors of the package: ‘Even experienced statistical analysts currently have few options to diagnose misspecification problems in GLM. DHARMa package aims at solving these problems by creating readily interpretable residuals for generalized linear models that are standardized to values between 0 and 1, and that can be interpreted as intuitively as residuals for the linear model. This is achieved by a simulation-based approach that transforms the residuals to a standardized scale. The key idea is that, if the model is correctly specified, then the observed data should look like as if it was created from the fitted model. Hence, for a correctly specified model, all values of the cumulative distribution should appear with equal probability. That means we expect the distribution of the residuals to be flat, regardless of the binomial model structure.’ — Source: DHARMa: residual diagnostics for hierarchical (multi-level/mixed) regression models.

sim.residuals <- simulateResiduals(fittedModel = model_random_intercept, n = 1000, refit = FALSE)
plotSimulatedResiduals(simulationOutput = sim.residuals)

According to the graphics, the simulated residuals do not reveal any misspecification of the model - they are uniformly and homogeneously distributed with respect to the model predictions (right graphic), and they also appear to be normally distributed.

We can also inspect the residuals not with respect to the model predictions, but to the different main fixed effects.

plotResiduals(sim.residuals, form = df_reduced$occurrence_l, xlab = "Occurrence of l", ylab = "Simulated Standardized Residuals")

plotResiduals(sim.residuals, form = df_reduced$branching_onset, xlab = "branching_onset", ylab = "Simulated Standardized Residuals")

plotResiduals(sim.residuals, form = df_reduced$V, xlab = "V", ylab = "Simulated Standardized Residuals", asFactor = F)

We do not see any violation of the uniformity and homogeneity of the distribution of the simulated residuals for our different categorical predictors.

plotResiduals(sim.residuals, form = df_reduced$rec_voc, xlab = "rec_voc", ylab = "Simulated Standardized Residuals")

plotResiduals(sim.residuals, form = df_reduced$verbal_stm, xlab = "verbal_stm", ylab = "Simulated Standardized Residuals", asFactor = F)

plotResiduals(sim.residuals, form = df_reduced$LoE, xlab = "LoE", ylab = "Simulated Standardized Residuals")

plotResiduals(sim.residuals, form = df_reduced$age, xlab = "age", ylab = "Simulated Standardized Residuals")

plotResiduals(sim.residuals, form = df_reduced$L1_syll_complexity, asFactor = F, xlab = "Syllabic complexity of the L1", ylab = "Simulated Standardized Residuals")

plotResiduals(sim.residuals, form = df_reduced$phono_awareness, xlab = "Phonological awareness", ylab = "Simulated Standardized Residuals")

We also do not see any violation of the uniformity and homogeneity of the distribution of the simulated residuals for our different numerical predictors.

5.3.4 Multicollinearity

We finally need to check whether there is multicolinearity in the model, i.e., whether the predictive effect of a fixed predictor is not too close to the predictive effect of a linear combination of other fixed predictors. We use the function check_collinearity() from the performance package, as it can be applied to mixed-effects logistic regressions:

check_collinearity(model_random_intercept)
# Check for Multicollinearity

Low Correlation

                             Term  VIF   VIF 95% CI Increased SE Tolerance Tolerance 95% CI
                     occurrence_l 1.11 [1.08, 1.15]         1.05      0.90     [0.87, 0.93]
                  branching_onset 1.22 [1.18, 1.26]         1.10      0.82     [0.79, 0.85]
                                V 1.18 [1.14, 1.22]         1.09      0.85     [0.82, 0.88]
                       scale(LoE) 4.68 [4.43, 4.95]         2.16      0.21     [0.20, 0.23]
                       scale(age) 1.78 [1.70, 1.86]         1.33      0.56     [0.54, 0.59]
                   scale(rec_voc) 1.84 [1.77, 1.93]         1.36      0.54     [0.52, 0.57]
                scale(verbal_stm) 1.27 [1.22, 1.32]         1.12      0.79     [0.76, 0.82]
           scale(phono_awareness) 1.55 [1.49, 1.62]         1.24      0.65     [0.62, 0.67]
               L1_syll_complexity 1.74 [1.66, 1.82]         1.32      0.58     [0.55, 0.60]
          occurrence_l:scale(LoE) 4.78 [4.53, 5.06]         2.19      0.21     [0.20, 0.22]
       branching_onset:scale(LoE) 1.44 [1.38, 1.50]         1.20      0.70     [0.67, 0.72]
        scale(LoE):scale(rec_voc) 1.42 [1.37, 1.48]         1.19      0.70     [0.68, 0.73]
     scale(age):scale(verbal_stm) 1.17 [1.14, 1.22]         1.08      0.85     [0.82, 0.88]
        scale(age):scale(rec_voc) 1.21 [1.17, 1.26]         1.10      0.82     [0.79, 0.85]
 scale(rec_voc):scale(verbal_stm) 1.70 [1.63, 1.78]         1.31      0.59     [0.56, 0.61]

None of the fixed effects has a VIF higher than 5. Two terms, LoE and occurrence_l:LoE have a VIF close to 5, but it is actually not unexpected that multicollinearity appears with interaction terms in a model. Overall, we do not have any obvious problem of multicollinearity.

5.3.5 Conclusion

We did not find any issue with our model. It therefore appears to be valid and a good first candidate to analyse the effect if the various predictors. Better random effects structures should, however, be considered.

5.4 Rationale for finding a good random effects structure

As explained by Barr et al. (2013), a random-effects model with intercepts only might lead to anti-conservative inference, i.e., might present an inflated rate of Type-I errors (false positives). The solution put forward by these authors is to consider, instead of only random intercepts, a maximal random-effects structure with all relevant random slopes given the fixed-effects predictors. Bates et al. (2015) pointed out, however, that this strategy is vulnerable to the lack of data to fit complex models (i.e., when there is not sufficient variance to sustain the model). They therefore suggest a method of gradual simplification of the random-effects structure.

In our case, initial investigations clearly show that considering large random-effects structures leads to singular models - these models are over-parameterized given the number of observations. Models with large random effects structures are also costly to compute, taking hours and hours, because of all the variances and covariances to estimate. The gradual simplification of a maximal structure is thus tedious, with no interesting results until only very few random slopes are left.

What we can do, instead of a step-down approach, is thus to consider a step-up approach, which starts from our solid candidate model and considers an ‘ascending’ trajectory to include random slopes. We can consider all possible candidate models with a single random slope, choose the best one, and then see whether a second slope can be added etc. To assess our models, we can pay attention to the random-effects structure, their parsimony with the Akaike Information Criterion (AIC), and fit to the data with ANOVA Chi² tests to compare models.

(We have actually performed both the step-up and step-down approaches, and they lead to the same conclusion.)

5.5 Step-up approach for the inclusion of random slopes

5.5.1 Finding the best model

Starting from our intercept-only model, we compute the ten possible models with a single additional random slope (leaving aside interaction terms as single random slopes):

For the sake of simplicity, we first define a string of characters containing the first part of the formulas, which will be pre-appended to other strings describing the various random-effects structures:

formula_beginning <-
  "repetition ~ occurrence_l + branching_onset + V + scale(LoE) + scale(age) + scale(rec_voc) + scale(verbal_stm) +
                scale(phono_awareness) + L1_syll_complexity + 
                occurrence_l:scale(LoE) + branching_onset:scale(LoE) + scale(rec_voc):scale(LoE) +
                scale(age):scale(verbal_stm) + scale(age):scale(rec_voc) + scale(rec_voc):scale(verbal_stm) + "
formulas <- c(
  rs_occ_l_subj = "(0 + occurrence_l | subject) + (1 | L1) + (1 | nonword)",
  rs_occ_l_L1 = "(1 | subject) + (0 + occurrence_l | L1) + (1 | nonword)",
  rs_bo_subj = "(0 + branching_onset | subject) + (1 | L1) + (1 | nonword)",
  rs_bo_subj_L1 = "(1 | subject) + (0 + branching_onset | L1) + (1 | nonword)",
  rs_V_subj = "(0 + V | subject) + (1 | L1) + (1 | nonword)",
  rs_V_L1 = "(1 | subject) + (0 + V | L1) + (1 | nonword)",
  rs_LoE = "(1 | subject) + (1 | L1) + (1 + scale(LoE) | nonword)",
  rs_age = "(1 | subject) + (1 | L1) + (1 + scale(age) | nonword)",
  rs_rec_voc = "(1 | subject) + (1 | L1) + (1 + scale(rec_voc) | nonword)",
  rs_verbal_stm = "(1 | subject) + (1 | L1) + (1 + scale(verbal_stm) | nonword)",
  rs_phono_awareness = "(1 | subject) + (1 | L1) + (1 + scale(phono_awareness) | nonword)",
  rs_L1_syll_complexity = "(1 | subject) + (1 | L1) + (1 + L1_syll_complexity | nonword)"
)

n <- names(formulas)
formulas <- paste(formula_beginning, formulas)
names(formulas) <- n
load(file = "models_one_random_slope.Rdata")

We compute all the models one by one:

models_one_random_slope <- lapply(formulas, function (f) {glmer(as.formula(f), data = df_reduced, control = ctrl, family = binomial(link = "logit"))})
names(models_one_random_slope) <- names(formulas)
save(models_one_random_slope, file = "D:/models_one_random_slope.Rdata")

We assess the singularity of the models:

tibble(rs = names(formulas), singular = lapply(models_one_random_slope, isSingular) %>% unlist())
# A tibble: 12 × 2
   rs                    singular
   <chr>                 <lgl>   
 1 rs_occ_l_subj         FALSE   
 2 rs_occ_l_L1           TRUE    
 3 rs_bo_subj            FALSE   
 4 rs_bo_subj_L1         TRUE    
 5 rs_V_subj             FALSE   
 6 rs_V_L1               TRUE    
 7 rs_LoE                TRUE    
 8 rs_age                TRUE    
 9 rs_rec_voc            FALSE   
10 rs_verbal_stm         FALSE   
11 rs_phono_awareness    FALSE   
12 rs_L1_syll_complexity FALSE   

Models rs_occ_l_L1, rs_bo_subj_L1, rs_V_L1, rs_LoE, and rs_age are singular and should not be considered further. The other models, however, are not, and we can compare them to our baseline, random intercepts model with ANOVAs:

anova(model_random_intercept, models_one_random_slope$rs_occ_l_subj)
Data: df_reduced
Models:
model_random_intercept: repetition ~ occurrence_l + branching_onset + V + scale(LoE) + scale(age) + scale(rec_voc) + scale(verbal_stm) + scale(phono_awareness) + L1_syll_complexity + occurrence_l:scale(LoE) + branching_onset:scale(LoE) + scale(rec_voc):scale(LoE) + scale(age):scale(verbal_stm) + scale(age):scale(rec_voc) + scale(rec_voc):scale(verbal_stm) + (1 | subject) + (1 | L1) + (1 | nonword)
models_one_random_slope$rs_occ_l_subj: repetition ~ occurrence_l + branching_onset + V + scale(LoE) + scale(age) + scale(rec_voc) + scale(verbal_stm) + scale(phono_awareness) + L1_syll_complexity + occurrence_l:scale(LoE) + branching_onset:scale(LoE) + scale(rec_voc):scale(LoE) + scale(age):scale(verbal_stm) + scale(age):scale(rec_voc) + scale(rec_voc):scale(verbal_stm) + (0 + occurrence_l | subject) + (1 | L1) + (1 | nonword)
                                      npar    AIC    BIC logLik deviance  Chisq Df         Pr(>Chisq)    
model_random_intercept                  24 3616.1 3767.0  -1784   3568.1                                 
models_one_random_slope$rs_occ_l_subj   29 3556.1 3738.4  -1749   3498.1 70.022  5 0.0000000000001014 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
anova(model_random_intercept, models_one_random_slope$rs_bo_subj)
Data: df_reduced
Models:
model_random_intercept: repetition ~ occurrence_l + branching_onset + V + scale(LoE) + scale(age) + scale(rec_voc) + scale(verbal_stm) + scale(phono_awareness) + L1_syll_complexity + occurrence_l:scale(LoE) + branching_onset:scale(LoE) + scale(rec_voc):scale(LoE) + scale(age):scale(verbal_stm) + scale(age):scale(rec_voc) + scale(rec_voc):scale(verbal_stm) + (1 | subject) + (1 | L1) + (1 | nonword)
models_one_random_slope$rs_bo_subj: repetition ~ occurrence_l + branching_onset + V + scale(LoE) + scale(age) + scale(rec_voc) + scale(verbal_stm) + scale(phono_awareness) + L1_syll_complexity + occurrence_l:scale(LoE) + branching_onset:scale(LoE) + scale(rec_voc):scale(LoE) + scale(age):scale(verbal_stm) + scale(age):scale(rec_voc) + scale(rec_voc):scale(verbal_stm) + (0 + branching_onset | subject) + (1 | L1) + (1 | nonword)
                                   npar    AIC    BIC  logLik deviance  Chisq Df Pr(>Chisq)
model_random_intercept               24 3616.1 3767.0 -1784.0   3568.1                     
models_one_random_slope$rs_bo_subj   29 3622.3 3804.7 -1782.2   3564.3 3.7904  5       0.58
anova(model_random_intercept, models_one_random_slope$rs_V_subj)
Data: df_reduced
Models:
model_random_intercept: repetition ~ occurrence_l + branching_onset + V + scale(LoE) + scale(age) + scale(rec_voc) + scale(verbal_stm) + scale(phono_awareness) + L1_syll_complexity + occurrence_l:scale(LoE) + branching_onset:scale(LoE) + scale(rec_voc):scale(LoE) + scale(age):scale(verbal_stm) + scale(age):scale(rec_voc) + scale(rec_voc):scale(verbal_stm) + (1 | subject) + (1 | L1) + (1 | nonword)
models_one_random_slope$rs_V_subj: repetition ~ occurrence_l + branching_onset + V + scale(LoE) + scale(age) + scale(rec_voc) + scale(verbal_stm) + scale(phono_awareness) + L1_syll_complexity + occurrence_l:scale(LoE) + branching_onset:scale(LoE) + scale(rec_voc):scale(LoE) + scale(age):scale(verbal_stm) + scale(age):scale(rec_voc) + scale(rec_voc):scale(verbal_stm) + (0 + V | subject) + (1 | L1) + (1 | nonword)
                                  npar    AIC  BIC  logLik deviance  Chisq Df Pr(>Chisq)
model_random_intercept              24 3616.1 3767 -1784.0   3568.1                     
models_one_random_slope$rs_V_subj   29 3618.7 3801 -1780.3   3560.7 7.4444  5     0.1896
anova(model_random_intercept, models_one_random_slope$rs_rec_voc)
Data: df_reduced
Models:
model_random_intercept: repetition ~ occurrence_l + branching_onset + V + scale(LoE) + scale(age) + scale(rec_voc) + scale(verbal_stm) + scale(phono_awareness) + L1_syll_complexity + occurrence_l:scale(LoE) + branching_onset:scale(LoE) + scale(rec_voc):scale(LoE) + scale(age):scale(verbal_stm) + scale(age):scale(rec_voc) + scale(rec_voc):scale(verbal_stm) + (1 | subject) + (1 | L1) + (1 | nonword)
models_one_random_slope$rs_rec_voc: repetition ~ occurrence_l + branching_onset + V + scale(LoE) + scale(age) + scale(rec_voc) + scale(verbal_stm) + scale(phono_awareness) + L1_syll_complexity + occurrence_l:scale(LoE) + branching_onset:scale(LoE) + scale(rec_voc):scale(LoE) + scale(age):scale(verbal_stm) + scale(age):scale(rec_voc) + scale(rec_voc):scale(verbal_stm) + (1 | subject) + (1 | L1) + (1 + scale(rec_voc) | nonword)
                                   npar    AIC    BIC  logLik deviance  Chisq Df Pr(>Chisq)
model_random_intercept               24 3616.1 3767.0 -1784.0   3568.1                     
models_one_random_slope$rs_rec_voc   26 3616.9 3780.3 -1782.4   3564.9 3.2458  2     0.1973
anova(model_random_intercept, models_one_random_slope$rs_verbal_stm)
Data: df_reduced
Models:
model_random_intercept: repetition ~ occurrence_l + branching_onset + V + scale(LoE) + scale(age) + scale(rec_voc) + scale(verbal_stm) + scale(phono_awareness) + L1_syll_complexity + occurrence_l:scale(LoE) + branching_onset:scale(LoE) + scale(rec_voc):scale(LoE) + scale(age):scale(verbal_stm) + scale(age):scale(rec_voc) + scale(rec_voc):scale(verbal_stm) + (1 | subject) + (1 | L1) + (1 | nonword)
models_one_random_slope$rs_verbal_stm: repetition ~ occurrence_l + branching_onset + V + scale(LoE) + scale(age) + scale(rec_voc) + scale(verbal_stm) + scale(phono_awareness) + L1_syll_complexity + occurrence_l:scale(LoE) + branching_onset:scale(LoE) + scale(rec_voc):scale(LoE) + scale(age):scale(verbal_stm) + scale(age):scale(rec_voc) + scale(rec_voc):scale(verbal_stm) + (1 | subject) + (1 | L1) + (1 + scale(verbal_stm) | nonword)
                                      npar    AIC    BIC logLik deviance Chisq Df Pr(>Chisq)
model_random_intercept                  24 3616.1 3767.0  -1784   3568.1                    
models_one_random_slope$rs_verbal_stm   26 3620.0 3783.5  -1784   3568.0 0.094  2     0.9541
anova(model_random_intercept, models_one_random_slope$rs_phono_awareness)
Data: df_reduced
Models:
model_random_intercept: repetition ~ occurrence_l + branching_onset + V + scale(LoE) + scale(age) + scale(rec_voc) + scale(verbal_stm) + scale(phono_awareness) + L1_syll_complexity + occurrence_l:scale(LoE) + branching_onset:scale(LoE) + scale(rec_voc):scale(LoE) + scale(age):scale(verbal_stm) + scale(age):scale(rec_voc) + scale(rec_voc):scale(verbal_stm) + (1 | subject) + (1 | L1) + (1 | nonword)
models_one_random_slope$rs_phono_awareness: repetition ~ occurrence_l + branching_onset + V + scale(LoE) + scale(age) + scale(rec_voc) + scale(verbal_stm) + scale(phono_awareness) + L1_syll_complexity + occurrence_l:scale(LoE) + branching_onset:scale(LoE) + scale(rec_voc):scale(LoE) + scale(age):scale(verbal_stm) + scale(age):scale(rec_voc) + scale(rec_voc):scale(verbal_stm) + (1 | subject) + (1 | L1) + (1 + scale(phono_awareness) | nonword)
                                           npar    AIC    BIC  logLik deviance  Chisq Df Pr(>Chisq)
model_random_intercept                       24 3616.1 3767.0 -1784.0   3568.1                     
models_one_random_slope$rs_phono_awareness   26 3615.9 3779.4 -1781.9   3563.9 4.2133  2     0.1216
anova(model_random_intercept, models_one_random_slope$rs_L1_syll_complexity)
Data: df_reduced
Models:
model_random_intercept: repetition ~ occurrence_l + branching_onset + V + scale(LoE) + scale(age) + scale(rec_voc) + scale(verbal_stm) + scale(phono_awareness) + L1_syll_complexity + occurrence_l:scale(LoE) + branching_onset:scale(LoE) + scale(rec_voc):scale(LoE) + scale(age):scale(verbal_stm) + scale(age):scale(rec_voc) + scale(rec_voc):scale(verbal_stm) + (1 | subject) + (1 | L1) + (1 | nonword)
models_one_random_slope$rs_L1_syll_complexity: repetition ~ occurrence_l + branching_onset + V + scale(LoE) + scale(age) + scale(rec_voc) + scale(verbal_stm) + scale(phono_awareness) + L1_syll_complexity + occurrence_l:scale(LoE) + branching_onset:scale(LoE) + scale(rec_voc):scale(LoE) + scale(age):scale(verbal_stm) + scale(age):scale(rec_voc) + scale(rec_voc):scale(verbal_stm) + (1 | subject) + (1 | L1) + (1 + L1_syll_complexity | nonword)
                                              npar    AIC    BIC  logLik deviance  Chisq Df Pr(>Chisq)
model_random_intercept                          24 3616.1 3767.0 -1784.0   3568.1                     
models_one_random_slope$rs_L1_syll_complexity   26 3619.0 3782.4 -1783.5   3567.0 1.1408  2     0.5653

Of all the models with a single random slope, only the ones with occurence_l with random slope, either for subject or for L1, have a significantly lower deviance. Their AIC are also much lower despite the increase in the number of parameters. occurence_l as a random slope for subject leads, however, to the better model. We can select this new model and consider a second random slope:

formulas <- c(
  second_rs_occ_l_L1 = "(0 + occurrence_l | subject) + (0 + occurrence_l | L1) + (1 | nonword)",
  second_rs_bo_subj = "(0 + occurrence_l + 0 + branching_onset | subject) + (1 | L1) + (1 | nonword)",
  second_rs_bo_subj_L1 = "(0 + occurrence_l | subject) + (0 + branching_onset | L1) + (1 | nonword)",
  second_rs_V_subj = "(0 + occurrence_l + V | subject) + (1 | L1) + (1 | nonword)",
  second_rs_V_L1 = "(0 + occurrence_l | subject) + (0 + V | L1) + (1 | nonword)",
  second_rs_LoE = "(0 + occurrence_l | subject) + (1 | L1) + (1 + scale(LoE) | nonword)",
  second_rs_age = "(0 + occurrence_l | subject) + (1 | L1) + (1 + scale(age) | nonword)",
  second_rs_rec_voc = "(0 + occurrence_l | subject) + (1 | L1) + (1 + scale(rec_voc) | nonword)",
  second_rs_verbal_stm = "(0 + occurrence_l | subject) + (1 | L1) + (1 + scale(verbal_stm) | nonword)",
  second_rs_phono_awareness = "(0 + occurrence_l | subject) + (1 | L1) + (1 + scale(phono_awareness) | nonword)",
  second_rs_L1_syll_complexity = "(0 + occurrence_l | subject) + (1 | L1) + (1 + L1_syll_complexity | nonword)")
 
n <- names(formulas)
formulas <- paste(formula_beginning, formulas)
names(formulas) <- n
load(file = "models_two_random_slopes.Rdata")

We compute all the models:

models_two_random_slopes <- lapply(formulas, function (f) {glmer(as.formula(f), data = df_reduced, control = ctrl, family = binomial(link = "logit"))})
names(models_two_random_slopes) <- names(formulas)
save(models_two_random_slopes, file = "D:/models_two_random_slopes.Rdata")

We assess the singularity of the models:

tibble(rs = names(formulas), singular = lapply(models_two_random_slopes, isSingular) %>% unlist())
# A tibble: 11 × 2
   rs                           singular
   <chr>                        <lgl>   
 1 second_rs_occ_l_L1           TRUE    
 2 second_rs_bo_subj            TRUE    
 3 second_rs_bo_subj_L1         TRUE    
 4 second_rs_V_subj             TRUE    
 5 second_rs_V_L1               TRUE    
 6 second_rs_LoE                TRUE    
 7 second_rs_age                TRUE    
 8 second_rs_rec_voc            FALSE   
 9 second_rs_verbal_stm         FALSE   
10 second_rs_phono_awareness    FALSE   
11 second_rs_L1_syll_complexity FALSE   

Models second_rs_occ_l_L1, second_rs_bo_subj, second_rs_bo_subj_L1, second_rs_V_subj, second_rs_V_L1, second_rs_LoE, and second_rs_age are singular and should not be considered further. The other models, however, are not, and we can compare them to our model with occurrence_l as random slope for subject:

anova(models_one_random_slope$rs_occ_l_subj, models_two_random_slopes$second_rs_rec_voc)
Data: df_reduced
Models:
models_one_random_slope$rs_occ_l_subj: repetition ~ occurrence_l + branching_onset + V + scale(LoE) + scale(age) + scale(rec_voc) + scale(verbal_stm) + scale(phono_awareness) + L1_syll_complexity + occurrence_l:scale(LoE) + branching_onset:scale(LoE) + scale(rec_voc):scale(LoE) + scale(age):scale(verbal_stm) + scale(age):scale(rec_voc) + scale(rec_voc):scale(verbal_stm) + (0 + occurrence_l | subject) + (1 | L1) + (1 | nonword)
models_two_random_slopes$second_rs_rec_voc: repetition ~ occurrence_l + branching_onset + V + scale(LoE) + scale(age) + scale(rec_voc) + scale(verbal_stm) + scale(phono_awareness) + L1_syll_complexity + occurrence_l:scale(LoE) + branching_onset:scale(LoE) + scale(rec_voc):scale(LoE) + scale(age):scale(verbal_stm) + scale(age):scale(rec_voc) + scale(rec_voc):scale(verbal_stm) + (0 + occurrence_l | subject) + (1 | L1) + (1 + scale(rec_voc) | nonword)
                                           npar    AIC    BIC  logLik deviance  Chisq Df Pr(>Chisq)
models_one_random_slope$rs_occ_l_subj        29 3556.1 3738.4 -1749.0   3498.1                     
models_two_random_slopes$second_rs_rec_voc   31 3555.6 3750.5 -1746.8   3493.6 4.4699  2      0.107
anova(models_one_random_slope$rs_occ_l_subj, models_two_random_slopes$second_rs_verbal_stm)
Data: df_reduced
Models:
models_one_random_slope$rs_occ_l_subj: repetition ~ occurrence_l + branching_onset + V + scale(LoE) + scale(age) + scale(rec_voc) + scale(verbal_stm) + scale(phono_awareness) + L1_syll_complexity + occurrence_l:scale(LoE) + branching_onset:scale(LoE) + scale(rec_voc):scale(LoE) + scale(age):scale(verbal_stm) + scale(age):scale(rec_voc) + scale(rec_voc):scale(verbal_stm) + (0 + occurrence_l | subject) + (1 | L1) + (1 | nonword)
models_two_random_slopes$second_rs_verbal_stm: repetition ~ occurrence_l + branching_onset + V + scale(LoE) + scale(age) + scale(rec_voc) + scale(verbal_stm) + scale(phono_awareness) + L1_syll_complexity + occurrence_l:scale(LoE) + branching_onset:scale(LoE) + scale(rec_voc):scale(LoE) + scale(age):scale(verbal_stm) + scale(age):scale(rec_voc) + scale(rec_voc):scale(verbal_stm) + (0 + occurrence_l | subject) + (1 | L1) + (1 + scale(verbal_stm) | nonword)
                                              npar    AIC    BIC logLik deviance  Chisq Df Pr(>Chisq)
models_one_random_slope$rs_occ_l_subj           29 3556.1 3738.4  -1749   3498.1                     
models_two_random_slopes$second_rs_verbal_stm   31 3560.0 3755.0  -1749   3498.0 0.0466  2      0.977
anova(models_one_random_slope$rs_occ_l_subj, models_two_random_slopes$second_rs_phono_awareness)
Data: df_reduced
Models:
models_one_random_slope$rs_occ_l_subj: repetition ~ occurrence_l + branching_onset + V + scale(LoE) + scale(age) + scale(rec_voc) + scale(verbal_stm) + scale(phono_awareness) + L1_syll_complexity + occurrence_l:scale(LoE) + branching_onset:scale(LoE) + scale(rec_voc):scale(LoE) + scale(age):scale(verbal_stm) + scale(age):scale(rec_voc) + scale(rec_voc):scale(verbal_stm) + (0 + occurrence_l | subject) + (1 | L1) + (1 | nonword)
models_two_random_slopes$second_rs_phono_awareness: repetition ~ occurrence_l + branching_onset + V + scale(LoE) + scale(age) + scale(rec_voc) + scale(verbal_stm) + scale(phono_awareness) + L1_syll_complexity + occurrence_l:scale(LoE) + branching_onset:scale(LoE) + scale(rec_voc):scale(LoE) + scale(age):scale(verbal_stm) + scale(age):scale(rec_voc) + scale(rec_voc):scale(verbal_stm) + (0 + occurrence_l | subject) + (1 | L1) + (1 + scale(phono_awareness) | nonword)
                                                   npar    AIC    BIC  logLik deviance Chisq Df Pr(>Chisq)
models_one_random_slope$rs_occ_l_subj                29 3556.1 3738.4 -1749.0   3498.1                    
models_two_random_slopes$second_rs_phono_awareness   31 3557.1 3752.0 -1747.5   3495.1 2.969  2     0.2266
anova(models_one_random_slope$rs_occ_l_subj, models_two_random_slopes$second_rs_L1_syll_complexity)
Data: df_reduced
Models:
models_one_random_slope$rs_occ_l_subj: repetition ~ occurrence_l + branching_onset + V + scale(LoE) + scale(age) + scale(rec_voc) + scale(verbal_stm) + scale(phono_awareness) + L1_syll_complexity + occurrence_l:scale(LoE) + branching_onset:scale(LoE) + scale(rec_voc):scale(LoE) + scale(age):scale(verbal_stm) + scale(age):scale(rec_voc) + scale(rec_voc):scale(verbal_stm) + (0 + occurrence_l | subject) + (1 | L1) + (1 | nonword)
models_two_random_slopes$second_rs_L1_syll_complexity: repetition ~ occurrence_l + branching_onset + V + scale(LoE) + scale(age) + scale(rec_voc) + scale(verbal_stm) + scale(phono_awareness) + L1_syll_complexity + occurrence_l:scale(LoE) + branching_onset:scale(LoE) + scale(rec_voc):scale(LoE) + scale(age):scale(verbal_stm) + scale(age):scale(rec_voc) + scale(rec_voc):scale(verbal_stm) + (0 + occurrence_l | subject) + (1 | L1) + (1 + L1_syll_complexity | nonword)
                                                      npar    AIC    BIC  logLik deviance  Chisq Df Pr(>Chisq)
models_one_random_slope$rs_occ_l_subj                   29 3556.1 3738.4 -1749.0   3498.1                     
models_two_random_slopes$second_rs_L1_syll_complexity   31 3559.1 3754.1 -1748.6   3497.1 0.9456  2     0.6233

None of the four models second_rs_rec_voc, second_rs_verbal_stm, second_rs_phono_awareness, second_rs_L1_syll_complexity predicts the data significantly better, despite the added number of degrees of freedom. This suggests to us that there is no point further adding random slopes (all the possible candidates will lead to singularity given what we have observed).

We therefore end up with a model with a single random slope, which performs better than our random-intercept model. We need, however, to assess that it meets the required assumptions.

We rename our model for ease of manipulation:

model_rep <- models_one_random_slope$rs_occ_l_subj

5.5.2 Normality of the levels of the random effects

p <- plot_model(model_rep, type = "diag", show.values = TRUE, value.offset = .3)
p$nonword + labs(title = "Random intercepts for nonword")

p$L1 + labs(title = "Random intercepts for L1")

p$subject + labs(title = "Random slopes for subject", subtitle = "occurrence_lcoda: l in internal coda position -- occurrence_lfinal: l in final position -- occurrence_lother: other nonwords") + theme_sjplot()

The quantile-quantile plots of the various random effects indicate that their levels follow a distribution which is quite close to a Gaussian distribution.

5.5.3 Residuals of the model

We simulate residuals with simulateResiduals():

sim.residuals <- simulateResiduals(fittedModel = model_rep, n = 1000, refit = FALSE)
plotSimulatedResiduals(simulationOutput = sim.residuals)

As previously, The simulated residuals do not reveal any misspecification of the model.

We further inspect the residuals with respect to the different main fixed effects:

plotResiduals(sim.residuals, form = df_reduced$occurrence_l, xlab = "Occurrence of l", ylab = "Simulated Standardized Residuals")

plotResiduals(sim.residuals, form = df_reduced$branching_onset, xlab = "branching_onset", ylab = "Simulated Standardized Residuals")

plotResiduals(sim.residuals, form = df_reduced$V, xlab = "V", ylab = "Simulated Standardized Residuals", asFactor = F)

We do not see any violation of the uniformity and homogeneity of the distribution of the simulated residuals for our different categorical predictors.

plotResiduals(sim.residuals, form = df_reduced$rec_voc, xlab = "rec_voc", ylab = "Simulated Standardized Residuals")

plotResiduals(sim.residuals, form = df_reduced$verbal_stm, xlab = "verbal_stm", ylab = "Simulated Standardized Residuals", asFactor = F)

plotResiduals(sim.residuals, form = df_reduced$LoE, xlab = "LoE", ylab = "Simulated Standardized Residuals")

plotResiduals(sim.residuals, form = df_reduced$age, xlab = "age", ylab = "Simulated Standardized Residuals")

plotResiduals(sim.residuals, form = df_reduced$L1_syll_complexity, asFactor = F, xlab = "Syllabic complexity of the L1", ylab = "Simulated Standardized Residuals")

plotResiduals(sim.residuals, form = df_reduced$phono_awareness, xlab = "Phonological awareness", ylab = "Simulated Standardized Residuals")

We also do not see any violation of the uniformity and homogeneity of the distribution of the simulated residuals for our different numerical predictors.

5.5.4 Multicollinearity

We use the function check_collinearity():

check_collinearity(model_rep)
# Check for Multicollinearity

Low Correlation

                             Term  VIF     VIF 95% CI Increased SE Tolerance Tolerance 95% CI
                     occurrence_l 1.09 [ 1.06,  1.13]         1.04      0.92     [0.88, 0.95]
                  branching_onset 1.19 [ 1.15,  1.24]         1.09      0.84     [0.81, 0.87]
                                V 1.16 [ 1.13,  1.21]         1.08      0.86     [0.83, 0.89]
                       scale(age) 1.87 [ 1.78,  1.95]         1.37      0.54     [0.51, 0.56]
                   scale(rec_voc) 2.01 [ 1.92,  2.11]         1.42      0.50     [0.47, 0.52]
                scale(verbal_stm) 1.23 [ 1.18,  1.28]         1.11      0.82     [0.78, 0.84]
           scale(phono_awareness) 1.43 [ 1.38,  1.49]         1.20      0.70     [0.67, 0.73]
               L1_syll_complexity 1.80 [ 1.72,  1.88]         1.34      0.56     [0.53, 0.58]
       branching_onset:scale(LoE) 1.56 [ 1.50,  1.63]         1.25      0.64     [0.61, 0.67]
        scale(LoE):scale(rec_voc) 1.47 [ 1.41,  1.53]         1.21      0.68     [0.65, 0.71]
     scale(age):scale(verbal_stm) 1.21 [ 1.17,  1.26]         1.10      0.82     [0.79, 0.85]
        scale(age):scale(rec_voc) 1.26 [ 1.21,  1.31]         1.12      0.80     [0.76, 0.82]
 scale(rec_voc):scale(verbal_stm) 1.89 [ 1.80,  1.98]         1.37      0.53     [0.51, 0.55]

High Correlation

                    Term   VIF     VIF 95% CI Increased SE Tolerance Tolerance 95% CI
              scale(LoE) 11.94 [11.26, 12.67]         3.46      0.08     [0.08, 0.09]
 occurrence_l:scale(LoE) 12.09 [11.40, 12.83]         3.48      0.08     [0.08, 0.09]

This time we find two fixed effects with a VIF higher than 10 - LoE and occurrence_l:LoE. As previoulsy, it is not unexpected that multicollinearity appears with interaction terms in a model, and there may be a possible inflation of VIF with the interaction terms. This is difficult to assess, though (some techniques are available, but only for models without random effects). We can say, nevertheless, that the issue, if there is one, will be limited to the two fixed effects - no spill over into the other predictors -, more specifically the estimates won’t be biased but inflated standard errors (by around 3.45-3.48) may lead to false negatives.

5.5.5 Conclusion

We do not find any issue with our model but the possibility of false negatives for two fixed effects. We consider it a choice solid enough for our analysis, and will attempt to further assess the role of LoE and occurrence_l:LoE with the random-intercepts-only model.

6 Assessing our hypotheses

6.1 Analyzing the fixed effects

In the next paragraphs, one should note that the graphical representations report probabilities of correct repetition of a nonword, i.e., in the ‘numerical space’ of the predicted variable. However, the statistical tests of the various effects consider the non-transformed estimates and confidence intervals, i.e. in the numerical space of the linear combination of predictors / independent variables.

We rely on the emmeans package and the computation of estimated marginal means of either levels of factors or linear trends. To adjust the p-values in the case of multiple tests, we rely on a multivariate t distribution (adjust = “mvt”) with the same covariance structure as the estimates, as it is the closest to an exact adjustment (see https://cran.r-project.org/web/packages/emmeans/vignettes/confidence-intervals.html).

6.1.1 Investigating the interactions

We can first investigate the higher-level predictors of the model, that is the different interactions.

  • occurrence_l * LoE
  • branching_onset * LoE
  • rec_voc * LoE
  • age * verbal_stm
  • age * rec_voc
  • verbal_stm * rec_voc

The first two interactions are between one categorical variable and the continuous variable LoE (occurrence_l : LoE and branching_onset : LoE)

We first define a range of variation for the values of LoE

list.LoE <- list(LoE = seq(min(df_reduced$LoE), max(df_reduced$LoE), by = 1))

For occurrence_l : LoE:

plot_model(model_rep, type = "emm", terms = c("LoE [all]", "occurrence_l"))

emtrends(model_rep, pairwise ~ occurrence_l | LoE, var= "LoE", adjust = "mvt", infer = c(T,T))
$emtrends
LoE = 184:
 occurrence_l  LoE.trend      SE  df asymp.LCL asymp.UCL z.ratio p.value
 coda         0.00119876 0.00343 Inf  -0.00553   0.00793   0.349  0.7270
 final        0.00352791 0.00335 Inf  -0.00303   0.01008   1.055  0.2916
 other        0.00000632 0.00145 Inf  -0.00284   0.00285   0.004  0.9965

Results are averaged over the levels of: branching_onset, V 
Confidence level used: 0.95 

$contrasts
LoE = 184:
 contrast      estimate      SE  df asymp.LCL asymp.UCL z.ratio p.value
 coda - final  -0.00233 0.00439 Inf  -0.01251   0.00785  -0.531  0.8503
 coda - other   0.00119 0.00285 Inf  -0.00541   0.00780   0.419  0.9039
 final - other  0.00352 0.00348 Inf  -0.00456   0.01160   1.011  0.5572

Results are averaged over the levels of: branching_onset, V 
Confidence level used: 0.95 
Conf-level adjustment: mvt method for 3 estimates 
P value adjustment: mvt method for 3 tests 

The figures show that the slopes for the levels other and coda are quite close, and are visually quite different from the slope for the third level final. The statistical tests, however, fail to detect a significant difference. The reason for that is possibly the large standard errors for coda and final, which are due in turn to the small number of nonwords which relate to these two levels (4 and 7 respectively, to be compared to 60 nonwords without l in coda or final position):

my_table <- df_reduced %>%
  dplyr::select(nonword, occurrence_l) %>%
  unique() %>%
  with(., table(occurrence_l)) %>%
  as.data.frame()
colnames(my_table) <- c("Occurrence of l", "# nonwords")
my_table
  Occurrence of l # nonwords
1            coda          4
2           final          7
3           other         60

The z ratios are overall small, which might give us some confidence that there is no false negative here despite the possibly inflated standard errors due to multicollinearity.

For branching_onset : LoE:

plot_model(model_rep, type = "emm", terms = c("LoE [all]", "branching_onset"))

emtrends(model_rep, pairwise ~ branching_onset | LoE, var= "LoE", adjust = "mvt", infer = c(T,T))
$emtrends
LoE = 184:
 branching_onset LoE.trend      SE  df asymp.LCL asymp.UCL z.ratio p.value
 0                0.002248 0.00184 Inf  -0.00136   0.00586   1.221  0.2222
 1                0.000395 0.00198 Inf  -0.00348   0.00427   0.200  0.8418
 2                0.002090 0.00312 Inf  -0.00403   0.00821   0.669  0.5034

Results are averaged over the levels of: occurrence_l, V 
Confidence level used: 0.95 

$contrasts
LoE = 184:
 contrast                             estimate      SE  df asymp.LCL asymp.UCL z.ratio p.value
 branching_onset0 - branching_onset1  0.001853 0.00116 Inf -0.000809   0.00452   1.603  0.2303
 branching_onset0 - branching_onset2  0.000157 0.00266 Inf -0.005962   0.00628   0.059  0.9980
 branching_onset1 - branching_onset2 -0.001696 0.00264 Inf -0.007774   0.00438  -0.642  0.7874

Results are averaged over the levels of: occurrence_l, V 
Confidence level used: 0.95 
Conf-level adjustment: mvt method for 3 estimates 
P value adjustment: mvt method for 3 tests 

While the figures show that the slopes for LoE differ according to the value of branching_onset, the statistical tests reveal that these slopes are actually not significantly different from each other.

We then have 4 interactions which consist of two continuous variables : rec_voc : LoE, age : verbal_stm, months : rec_voc and verbal_stm : rec_voc

For rec_voc : LoE:

plot_model(model_rep, type = "emm", terms = c("LoE [all]", "rec_voc"))

Knowing that the interaction between rec_voc and LoE corresponds to the change in the slope of LoE for every one unit increase in rec_voc (or vice-versa), we can assess the significance of this interaction by looking at the contrast / difference between two slopes of rec_voc separated by one unit increase in LoE (the result will be independent from the choice of the two values separated by one unit).

list.LoE.red <- list(LoE = seq(median(df_reduced$LoE) - 0.5, median(df_reduced$LoE) + 0.5, by = 1))
emtrends(model_rep, pairwise ~ LoE, var = "rec_voc", at = list.LoE.red, adjust = "mvt", infer = c(T, T))
$emtrends
 LoE rec_voc.trend     SE  df asymp.LCL asymp.UCL z.ratio p.value
 184        0.0549 0.0206 Inf    0.0146    0.0953   2.669  0.0076
 184        0.0552 0.0207 Inf    0.0147    0.0957   2.673  0.0075

Results are averaged over the levels of: occurrence_l, branching_onset, V 
Confidence level used: 0.95 

$contrasts
 contrast            estimate       SE  df asymp.LCL asymp.UCL z.ratio p.value
 LoE183.5 - LoE184.5 -0.00029 0.000211 Inf -0.000704  0.000123  -1.377  0.1684

Results are averaged over the levels of: occurrence_l, branching_onset, V 
Confidence level used: 0.95 

We find that the estimate for the interaction is not significantly different from 0 - the p-value is larger than 0.05.

For age : verbal_stm:

plot_model(model_rep, type = "emm", terms = c("age [all]", "verbal_stm"))

list.verbal_stm.red <- list(verbal_stm = seq(median(df_reduced$verbal_stm) - 0.5, median(df_reduced$verbal_stm) + 0.5, by = 1))
emtrends(model_rep, pairwise ~ verbal_stm, var = "age", at = list.verbal_stm.red, adjust = "mvt", infer = c(T, T))
$emtrends
 verbal_stm age.trend     SE  df asymp.LCL asymp.UCL z.ratio p.value
        3.5   0.00595 0.0113 Inf   -0.0161    0.0280   0.528  0.5976
        4.5   0.00917 0.0154 Inf   -0.0210    0.0393   0.596  0.5511

Results are averaged over the levels of: occurrence_l, branching_onset, V 
Confidence level used: 0.95 

$contrasts
 contrast                      estimate     SE  df asymp.LCL asymp.UCL z.ratio p.value
 verbal_stm3.5 - verbal_stm4.5 -0.00322 0.0121 Inf   -0.0269    0.0205  -0.266  0.7901

Results are averaged over the levels of: occurrence_l, branching_onset, V 
Confidence level used: 0.95 

The p-value is much larger than 0.05.

For age : rec_voc:

plot_model(model_rep, type = "emm", terms = c("age [all]", "rec_voc"))

list.rec_voc.red <- list(rec_voc = seq(median(df_reduced$rec_voc) - 0.5, median(df_reduced$rec_voc) + 0.5, by = 1))
emtrends(model_rep, pairwise ~ rec_voc, var = "age", at = list.rec_voc.red, adjust = "mvt", infer = c(T, T))
$emtrends
 rec_voc age.trend     SE  df asymp.LCL asymp.UCL z.ratio p.value
      20   0.00751 0.0116 Inf   -0.0152    0.0302   0.648  0.5172
      21   0.00666 0.0116 Inf   -0.0161    0.0294   0.574  0.5662

Results are averaged over the levels of: occurrence_l, branching_onset, V 
Confidence level used: 0.95 

$contrasts
 contrast              estimate      SE  df asymp.LCL asymp.UCL z.ratio p.value
 rec_voc20 - rec_voc21  0.00085 0.00156 Inf  -0.00221   0.00391   0.544  0.5861

Results are averaged over the levels of: occurrence_l, branching_onset, V 
Confidence level used: 0.95 

The p-value for the interaction is higher than 0.05.

For verbal_stm : rec_voc:

plot_model(model_rep, type = "emm", terms = c("verbal_stm", "rec_voc"))

list.verbal_stm.red <- list(verbal_stm = seq(median(df_reduced$verbal_stm) - 0.5, median(df_reduced$verbal_stm) + 0.5, by = 1))
emtrends(model_rep, pairwise ~ verbal_stm, var = "rec_voc", at = list.verbal_stm.red, adjust = "mvt", infer = c(T, T))
$emtrends
 verbal_stm rec_voc.trend     SE  df asymp.LCL asymp.UCL z.ratio p.value
        3.5        0.0581 0.0281 Inf   0.00301    0.1133   2.067  0.0387
        4.5        0.0503 0.0222 Inf   0.00673    0.0938   2.263  0.0236

Results are averaged over the levels of: occurrence_l, branching_onset, V 
Confidence level used: 0.95 

$contrasts
 contrast                      estimate     SE  df asymp.LCL asymp.UCL z.ratio p.value
 verbal_stm3.5 - verbal_stm4.5  0.00787 0.0329 Inf   -0.0565    0.0723   0.240  0.8106

Results are averaged over the levels of: occurrence_l, branching_onset, V 
Confidence level used: 0.95 

The p-value is once again much larger than 0.05.

Our investigation of the interactions shows that none of them is statistically significant. A possible option would then be to simplify the model by dropping these interactions. However, this amounts to model selection (for the fixed effects), which is warned against by a number of prominent statisticians. In what follows, we are therefore going to assess main effects despite the presence of interactions in the model.

6.1.2 Investigating the main effects

Given that none of the interactions we thought could be significant appears to be so, we can focus on the main effects in our model, i.e. the effects of the item-related categorical variables occurrence_l, branching_onset, and V, and the subject-related continous variables rec_voc, verbal_stm, LoE, age, L1_syll_complexity and phono_awareness.

For occurrence_l:

plot_model(model_rep, type = "emm", terms = "occurrence_l")

summary(emmeans(model_rep, pairwise ~ occurrence_l, adjust = "mvt", side = "<"), infer = c(TRUE, TRUE), null = 0)$contrasts
 contrast      estimate    SE  df asymp.LCL asymp.UCL z.ratio p.value
 coda - final    -0.489 0.526 Inf      -Inf    0.6044  -0.930  0.3960
 coda - other    -1.244 0.398 Inf      -Inf   -0.4175  -3.129  0.0024
 final - other   -0.756 0.373 Inf      -Inf    0.0199  -2.026  0.0562

Results are averaged over the levels of: branching_onset, V 
Results are given on the log odds ratio (not the response) scale. 
Confidence level used: 0.95 
Conf-level adjustment: mvt method for 3 estimates 
P value adjustment: mvt method for 3 tests 
P values are left-tailed 

We set the parameter side to reflect our hypothesis that a nonword gets easier to repeat when shifting from l in coda position to l in final position to another structure.

We find that:

  • nonwords are significantly more difficult to repeat when l appears in internal coda position than when there is no l (coda - other, p = 0.002)

  • we don’t have evidence that nonwords are significantly more difficult to repeat when l appears in internal coda position than when l appears in final position (coda - final, p = 0.396)

  • there is a strong tendency for nonwords where l appears in final position to be more difficult to repeat than nonwords where there is no l (final - other, p < 0.056)

For branching_onset:

plot_model(model_rep, type = "emm", terms = "branching_onset")

summary(emmeans(model_rep, pairwise ~ branching_onset, adjust = "mvt", side = ">"), infer = c(TRUE, TRUE), null = 0)$contrasts
 contrast                            estimate    SE  df asymp.LCL asymp.UCL z.ratio p.value
 branching_onset0 - branching_onset1    0.847 0.184 Inf   0.47667       Inf   4.614  <.0001
 branching_onset0 - branching_onset2    1.875 0.499 Inf   0.86886       Inf   3.760  0.0002
 branching_onset1 - branching_onset2    1.028 0.506 Inf   0.00778       Inf   2.033  0.0485

Results are averaged over the levels of: occurrence_l, V 
Results are given on the log odds ratio (not the response) scale. 
Confidence level used: 0.95 
Conf-level adjustment: mvt method for 3 estimates 
P value adjustment: mvt method for 3 tests 
P values are right-tailed 

We set the parameter side to reflect our hypothesis that the more branching onsets in a nonword, the more difficult it is to repeat.

We observe that all the contrasts are significant, and that therefore:

  • A nonword is more difficult to repeat when it has 1 branching onset than when it has none (p < 0.001)
  • A nonword is more difficult to repeat when it has 2 branching onsets than when it has none (p < 0.001)
  • A nonword is more difficult to repeat when it has 2 branching onsets than when it has 1 (p = 0.048)

For V:

plot_model(model_rep, type = "emm", terms = "V")

summary(emmeans(model_rep, pairwise ~ V, adjust = "mvt", side = ">"), infer = c(TRUE, TRUE), null = 0)$contrasts
 contrast estimate    SE  df asymp.LCL asymp.UCL z.ratio p.value
 V1 - V2    0.0767 0.212 Inf   -0.3648       Inf   0.361  0.6670
 V1 - V3    0.5764 0.217 Inf    0.1261       Inf   2.660  0.0109
 V2 - V3    0.4997 0.215 Inf    0.0528       Inf   2.324  0.0273

Results are averaged over the levels of: occurrence_l, branching_onset 
Results are given on the log odds ratio (not the response) scale. 
Confidence level used: 0.95 
Conf-level adjustment: mvt method for 3 estimates 
P value adjustment: mvt method for 3 tests 
P values are right-tailed 

The parameter side corresponds to the hypothesis that the less vowels in a nonword, the easier it is to repeat.

We find two significant differences: a nonword with a single vowel is easier to repeat than a nonword with 3 vowels (p = 0.011), and a nonword with 2 vowels is easier to repeat than a nonword with 3 vowels (p = 0.027). We can’t conclude to any difference between 1 and 2 vowels (p = 0.667)

For rec_voc:

plot_model(model_rep, type = "emm", terms = "rec_voc [all]")

summary(emtrends(model_rep, ~ rec_voc, var = "rec_voc", adjust = "mvt", side = ">"), infer = c(TRUE, TRUE), null = 0)
 rec_voc rec_voc.trend     SE  df asymp.LCL asymp.UCL z.ratio p.value
    20.4        0.0552 0.0207 Inf    0.0212       Inf   2.672  0.0038

Results are averaged over the levels of: occurrence_l, branching_onset, V 
Confidence level used: 0.95 
P values are right-tailed 

We set the parameter side to reflect our hypothesis that the larger a child’s French receptive vocabulary, higher the probability of correct repetition.

We observe a significant effect for rec_voc (p = 0.004): the size of a child’s receptive vocabulary positively impacts her/his ability to correctly repeat nonwords.

For verbal_stm:

plot_model(model_rep, type = "emm", terms = "verbal_stm")

summary(emtrends(model_rep, ~ verbal_stm, var = "verbal_stm", adjust = "mvt", side = ">"), infer = c(TRUE, TRUE), null = 0)
 verbal_stm verbal_stm.trend    SE  df asymp.LCL asymp.UCL z.ratio p.value
       3.88         -0.00826 0.123 Inf    -0.211       Inf  -0.067  0.5267

Results are averaged over the levels of: occurrence_l, branching_onset, V 
Confidence level used: 0.95 
P values are right-tailed 

We set the parameter side to reflect our hypothesis that the larger a child’s verbal short-term memory, the higher the probability of correct repetition.

We do not observe a significant effect of verbal_stm.

For LoE:

plot_model(model_rep, type = "emm", terms = "LoE [all]")

summary(emtrends(model_rep, ~ LoE, var = "LoE", adjust = "mvt", side = ">"), infer = c(TRUE, TRUE), null = 0)
 LoE LoE.trend      SE  df asymp.LCL asymp.UCL z.ratio p.value
 184   0.00158 0.00199 Inf   -0.0017       Inf   0.791  0.2145

Results are averaged over the levels of: occurrence_l, branching_onset, V 
Confidence level used: 0.95 
P values are right-tailed 

We set the parameter side to reflect our hypothesis that the longer the exposure to French, the higher the probability of correct repetition.

We do not observe a significant effect of LoE on the probability of correct repetition. This may be due, however, to an inflation of the standard error resulting from multicollinearity.

For age:

plot_model(model_rep, type = "emm", terms = "age [all]")

summary(emtrends(model_rep, ~ age, var = "age", adjust = "mvt", side = ">"), infer = c(TRUE, TRUE), null = 0)
  age age.trend     SE  df asymp.LCL asymp.UCL z.ratio p.value
 90.5   0.00716 0.0116 Inf   -0.0119       Inf   0.619  0.2681

Results are averaged over the levels of: occurrence_l, branching_onset, V 
Confidence level used: 0.95 
P values are right-tailed 

We set the parameter side to reflect our hypothesis that the older a child is, the higher the probability of correct repetition.

We do not observe a significant effect of age (p = 0.268).

For L1_syll_complexity:

plot_model(model_rep, type = "emm", terms = "L1_syll_complexity [all]")

summary(emtrends(model_rep, ~ age, var = "L1_syll_complexity", adjust = "mvt", side = ">"), infer = c(TRUE, TRUE), null = 0)
  age L1_syll_complexity.trend    SE  df asymp.LCL asymp.UCL z.ratio p.value
 90.5                    0.159 0.134 Inf    -0.062       Inf   1.184  0.1183

Results are averaged over the levels of: occurrence_l, branching_onset, V 
Confidence level used: 0.95 
P values are right-tailed 

We set the parameter side to reflect our hypothesis that the higher the complexity of the syllables of the L1, the higher the probability of correct repetition.

We do not observe a significant effect of L1_syll_complexity (p = 0.118).

For phono_awareness:

plot_model(model_rep, type = "emm", terms = "phono_awareness [all]")

summary(emtrends(model_rep, ~ phono_awareness, var = "phono_awareness", adjust = "mvt", side = ">"), infer = c(TRUE, TRUE), null = 0)
 phono_awareness phono_awareness.trend     SE  df asymp.LCL asymp.UCL z.ratio p.value
           -2.82                 0.125 0.0683 Inf    0.0131       Inf   1.837  0.0331

Results are averaged over the levels of: occurrence_l, branching_onset, V 
Confidence level used: 0.95 
P values are right-tailed 

We set the parameter side to reflect our hypothesis that the more developed a child’s phonological awareness, the higher the probability of correct repetition.

We observe a significant effect of phono_awareness (p = 0.033): the more developed the child’s phonological awareness, the better they are at correctly repeating the nonwords.

6.1.3 Summary for the main effects

All our hypotheses were not confirmed. We found the following results:

  • nonwords are significantly more difficult to repeat when l appears in internal coda position than when there is no l (p = 0.002).
  • the more branching onsets nonwords have, the more difficult they are to repeat (1 versus 0 BO: p < 0.001; 2 versus 0 BO: p < 0.001; 2 versus 1 BO: p = 0.048)
  • nonwords with 3 vowels are more difficult to repeat than nonwords with 1 or 2 vowels (p = 0.011 and p = 0.027, respectively)
  • the larger a child’s receptive vocabulary, the easier for them to repeat the nonwords (p = 0.004)
  • the more developed a child’s phonological awareness, the easier for them to repeat the nonwords (p = 0.033)

Additionally, we observed a statistical tendency: nonwords are significantly more difficult to repeat when l appears in final position than when there is no l (p = 0.056).

We did not observe effects of verbal_stm, age, or LoE. We also did not find a significant difference between l appearing in internal coda position in nonwords and l appearing in final position. We finally did not find an effect of the complexity of syllables in L1.

Our analysis overall proves to be quite simple, in the sense that we did not observe any significant interaction.

p1 <- plot_model(model_rep, type = "emm", terms = "occurrence_l", show.values = TRUE, value.offset = .3, colors = "gs") +
  labs(title = "Effect of occurence_l", x = "Location of /l/", y = "Predicted probability of correct repetition")
p2 <- plot_model(model_rep, type = "emm", terms = "branching_onset", show.values = TRUE, value.offset = .3, colors = "gs") +
  labs(title = "Effect of branching_onset", x = "Number of branching onsets", y = "Predicted probability of correct repetition")
p3 <- plot_model(model_rep, type = "emm", terms = "V", show.values = TRUE, value.offset = .3, colors = "gs") +
  labs(title = "Effect of V", x = "Number of vowels", y = "Predicted probability of correct repetition")
p4 <- plot_model(model_rep, type = "emm", terms = "rec_voc [all]", show.values = TRUE, value.offset = .3, colors = "gs") +
  labs(title = "Effect of rec_voc", x = "Size of the receptive vocabulary", y = "Predicted probability of correct repetition")
p5 <- plot_model(model_rep, type = "emm", terms = "phono_awareness [all]", show.values = TRUE, value.offset = .3, colors = "gs") +
  labs(title = "Effect of phonological awareness", x = "Phonological awareness", y = "Predicted probability of correct repetition")

grid.arrange(p1, p2, p3, p4, p5, ncol = 3)

6.2 Analyzing the random effects

While our analysis is centered on fixed effects, it is interesting to pay attention to the distribution of values of the different random effects in our model.

p <- plot_model(model_rep, type = "re", sort.est = "sort.all", grid = F, transform = NULL)

The most interesting source of information is likely the distribution of the random intercepts of the different children’s L1:

p[[5]] + scale_color_grey()

We can notice that the languages closest to French genealogically speaking, i.e., Spanish, Italian, Portuguese and Brazilian Portuguese have negative intercepts (while accounting for the complexity of their syllables with L1_syll_complexity as a fixed continuous predictor). Romanian, however, has the third highest positive intercept. The distribution does not seem suggestive of an effect of earlier bilingualism (before the acquisition of French) - the intercepts for Arabic/Italian, Russian/Chechen etc. are rather in the middle of the distribution.

p[[1]] + scale_color_grey()

There is no striking element in the distribution observed for the nonwords as random intercepts.

We can finally display the values of the various random slopes for occurrence_l:

p[[2]] + scale_color_grey()

p[[3]] + scale_color_grey()

p[[4]] + scale_color_grey()

Nothing striking appears.

6.3 Assessing the effect of LoE and occurrence_l:LoE with the random-intercepts-only model

We can assess the effect of LoE and occurrence_l:LoE with our initial, random-intercepts-only, model, as it is supposed to be less conservative than the model with the additional random slope for occurrence_l. If we don’t reach significance with this model (for which there is no strong multicollinearity), we can find more confident that the previous lack of results is not due to multicollinearity.

emtrends(model_random_intercept, pairwise ~ branching_onset | LoE, var= "LoE", adjust = "mvt", infer = c(T,T))
$emtrends
LoE = 184:
 branching_onset LoE.trend      SE  df asymp.LCL asymp.UCL z.ratio p.value
 0                0.002481 0.00138 Inf -0.000227   0.00519   1.796  0.0725
 1                0.000619 0.00156 Inf -0.002440   0.00368   0.397  0.6917
 2                0.002364 0.00290 Inf -0.003311   0.00804   0.817  0.4142

Results are averaged over the levels of: occurrence_l, V 
Confidence level used: 0.95 

$contrasts
LoE = 184:
 contrast                             estimate      SE  df asymp.LCL asymp.UCL z.ratio p.value
 branching_onset0 - branching_onset1  0.001863 0.00115 Inf -0.000792   0.00452   1.618  0.2234
 branching_onset0 - branching_onset2  0.000117 0.00268 Inf -0.006061   0.00630   0.044  0.9989
 branching_onset1 - branching_onset2 -0.001746 0.00266 Inf -0.007880   0.00439  -0.656  0.7789

Results are averaged over the levels of: occurrence_l, V 
Confidence level used: 0.95 
Conf-level adjustment: mvt method for 3 estimates 
P value adjustment: mvt method for 3 tests 
summary(emtrends(model_random_intercept, ~ LoE, var = "LoE", adjust = "mvt", side = ">"), infer = c(TRUE, TRUE), null = 0)
 LoE LoE.trend      SE  df asymp.LCL asymp.UCL z.ratio p.value
 184   0.00182 0.00159 Inf -0.000786       Inf   1.149  0.1253

Results are averaged over the levels of: occurrence_l, branching_onset, V 
Confidence level used: 0.95 
P values are right-tailed 

We do not observe any significant effect.